*******************************************************************
* Purpose: Prepare representative data sets for creating weights  *
* Author: Peter Kuhn, Trevor Osaki, Lei Yue                       *
* Date: September 2024                                            *
*******************************************************************


* Work with ACS Data

global homedir ""/Users/leiyue/Desktop/Projects/WhenIsDicrimUnfair/Replication_Final/""  

cd $homedir
cd "1_Dataset Construction/jj_Materials for Weights"
use "acs2019.dta", replace

drop if region == 9
drop if age < 18 | age ==.


keep perwgt age female wbhapom educ08

* Make race dummies
gen white = (wbhapom == 1)
gen black = (wbhapom == 2)
gen hispanic = (wbhapom == 3)
gen asian = (wbhapom == 4)
gen islander = (wbhapom == 5)
gen indigenous = (wbhapom == 6)
gen other = (wbhapom == 7)

gen other_combined = (asian == 1 | hispanic == 1 | indigenous == 1 | islander == 1 | other == 1)

* Make dummies for education
gen edu_hs = (educ08 <= 17)
gen edu_2year = (educ08 >= 18 & educ08 <= 20)
gen edu_4year = (educ08 == 21)
gen edu_grad = (educ08 == 22 | educ08 == 23 | educ08 == 24)


* Make dummies for age
gen age18_24 = (age >= 18 & age <= 24)
gen age25_34 = (age >= 25 & age <= 34)
gen age35_44 = (age >= 35 & age <= 44)
gen age45_54 = (age >= 45 & age <= 54)
gen age55_64 = (age >= 55 & age <= 64)
gen age65_over = (age >= 65)


* Make categorical variables
*gen race = (white == 1)
*replace race = 2 if black == 1
*replace race = 3 if other_combined == 1

gen gender2 = (female == 0)
replace gender2 = 2 if female == 1

gen race2 = (white == 1)
replace race2 = 2 if white == 0

gen age2 = (age18_24 == 1)
replace age2 = 2 if age25_34 == 1 | age35_44 == 1
replace age2 = 3 if age45_54 == 1 | age55_64 == 1 | age65_over == 1

gen edu2 = (edu_hs == 1 | edu_2year)
replace edu2 = 2 if edu_4year == 1 | edu_grad == 1

export delimited using "acs_data", replace

clear

* Work with GSS Data

use "gss2020.dta", replace

* Deal with cleaning up data file
keep if panstat == 1

drop age_1a degree_1a sex_1a age_1b degree_1b sex_1b partyid_2 wtssall_2 wtss_2 

gen race_2 = 0
replace race_2 = race_1a if samptype == 2016
replace race_2 = race_1b if samptype == 2018

gen racecen1_2 = 0
replace racecen1_2 = racecen1_1a if samptype == 2016
replace racecen1_2 = racecen1_1b if samptype == 2018

gen racecen2_2 = 0
replace racecen2_2 = racecen2_1a if samptype == 2016
replace racecen2_2 = racecen2_1b if samptype == 2018

gen racecen3_2 = 0
replace racecen3_2 = racecen3_1a if samptype == 2016
replace racecen3_2 = racecen3_1b if samptype == 2018

drop race_1a racecen1_1a racecen2_1a racecen3_1a race_1b racecen1_1b racecen2_1b racecen3_1b


* Drop non-responses
drop if degree > 5
drop if polviews > 7
drop if sex_2 > 2


* Make dummies for political leaning
gen extremely_conservative = (polviews_2 == 7)
gen conservative = (polviews_2 == 6)
gen slightly_conservative = (polviews_2 == 5)
gen moderate = (polviews_2 == 4)
gen slightly_liberal = (polviews_2 == 3)
gen liberal = (polviews_2 == 2)
gen extremely_liberal = (polviews_2 == 1)


gen views2 = (extremely_conservative == 1)
replace views2 = 2 if conservative == 1 
replace views2 = 3 if slightly_conservative == 1
replace views2 = 4 if moderate == 1
replace views2 = 5 if slightly_liberal == 1 
replace views2 = 6 if liberal == 1
replace views2 = 7 if extremely_liberal == 1


export delimited using "gss_data", replace
